import os
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import chart_studio.plotly as py
from IPython.display import IFrame
from datetime import datetime
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode()
%matplotlib inline
confirmed_cases_path = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv"
deaths_path = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv"
cured_path = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv"
confirmed_cases = pd.read_csv(confirmed_cases_path)
confirmed_cases.head()
deaths_data = pd.read_csv(deaths_path)
deaths_data.head()
recovered_cases = pd.read_csv(cured_path)
recovered_cases.head()
days_columns = confirmed_cases.columns[4:]
world_cases_growth = confirmed_cases[days_columns].sum(axis = 0).reset_index()
world_cases_growth.columns = ['Date', 'Count']
def isweekend(date):
return datetime.isoweekday(pd.to_datetime(date)) > 5
world_cases_growth['isweekend'] = world_cases_growth['Date'].apply(isweekend).astype(int)
plt.rcParams['figure.figsize'] = [20, 10]
plt.stem(world_cases_growth['Date'], world_cases_growth['Count'], '--ro')
plt.plot(world_cases_growth['Date'], world_cases_growth['Count'], '--bo')
plt.title("Spread of virus per each day")
plt.xticks(world_cases_growth['Date'], rotation = 90)
plt.yticks(world_cases_growth['Count'])
plt.show();
world_cases_growth['delta_confirmed'] = world_cases_growth['Count'].sub(world_cases_growth['Count'].shift())
world_cases_growth.loc[0, 'delta_confirmed'] = world_cases_growth.loc[0, 'Count']
plt.rcParams['figure.figsize'] = [20,10]
plt.stem(world_cases_growth['Date'], world_cases_growth['delta_confirmed'], '--yo')
plt.plot(world_cases_growth['Date'], world_cases_growth['delta_confirmed'], '--ro')
for i in range(world_cases_growth.shape[0]):
plt.text(world_cases_growth.loc[i, 'Date'], world_cases_growth.loc[i, 'delta_confirmed']+2,
world_cases_growth.loc[i,'delta_confirmed'].astype(int))
plt.title("Delta new cases of to Covid-19 identified on each day")
plt.xticks(world_cases_growth['Date'], rotation = 90)
plt.show();
world_cases_growth['deaths'] = deaths_data[days_columns].sum(axis = 0).reset_index(drop = True)
plt.rcParams['figure.figsize'] = [20,10]
plt.stem(world_cases_growth['Date'], world_cases_growth['deaths'], '--yo')
plt.plot(world_cases_growth['Date'], world_cases_growth['deaths'], '--ro')
for i in range(world_cases_growth.shape[0]):
plt.text(world_cases_growth.loc[i, 'Date'], world_cases_growth.loc[i, 'deaths']+50,
world_cases_growth.loc[i,'deaths'])
plt.title("Growth of death toll due to Covid-19 per each day")
plt.xticks(world_cases_growth['Date'], rotation = 90)
plt.show()
But the above representation is a cumulative sum of previous day count and newly added count, We can look at the delta change for every day.
world_cases_growth['delta_deaths'] = world_cases_growth['deaths'].sub(world_cases_growth['deaths'].shift())
world_cases_growth.loc[0, 'delta_deaths'] = world_cases_growth.loc[0, 'deaths']
plt.rcParams['figure.figsize'] = [20,10]
plt.stem(world_cases_growth['Date'], world_cases_growth['delta_deaths'], '--yo')
plt.plot(world_cases_growth['Date'], world_cases_growth['delta_deaths'], '--ro')
for i in range(world_cases_growth.shape[0]):
plt.text(world_cases_growth.loc[i, 'Date'], world_cases_growth.loc[i, 'delta_deaths']+2,
world_cases_growth.loc[i,'delta_deaths'].astype(int))
plt.title("# of deaths due to Covid-19 per each day")
plt.xticks(world_cases_growth['Date'], rotation = 90)
plt.show()
world_cases_growth['recovered'] = recovered_cases[days_columns].sum(axis = 0).reset_index(drop = True)
plt.rcParams['figure.figsize'] = [20,10]
plt.stem(world_cases_growth['Date'], world_cases_growth['recovered'], '--yo')
plt.plot(world_cases_growth['Date'], world_cases_growth['recovered'], '--go')
for i in range(world_cases_growth.shape[0]):
plt.text(world_cases_growth.loc[i, 'Date'], world_cases_growth.loc[i, 'recovered']+50,
world_cases_growth.loc[i,'recovered'])
plt.title("# of people that recovered from Covid-19 virus")
plt.xticks(world_cases_growth['Date'], rotation = 90)
plt.show()
plt.rcParams['figure.figsize'] = [20, 10]
plt.figure(1)
plt.subplot(311)
plt.plot(world_cases_growth['Date'], world_cases_growth['Count'], '--bo', label = 'ACTIVE CASES')
plt.xticks(world_cases_growth['Date'], rotation = 90)
plt.legend()
plt.subplot(312)
plt.plot(world_cases_growth['Date'], world_cases_growth['deaths'], '--ro', label = 'DEATHS')
plt.xticks(world_cases_growth['Date'], rotation = 90)
plt.legend()
plt.subplot(313)
plt.plot(world_cases_growth['Date'], world_cases_growth['recovered'], '--go', label = 'CURED')
plt.xticks(world_cases_growth['Date'], rotation = 90)
plt.legend()
plt.show()
p1 = plt.bar(world_cases_growth['Date'], world_cases_growth['Count'], color = 'yellow')
p2 = plt.bar(world_cases_growth['Date'], world_cases_growth['recovered'], color='g')
p3 = plt.bar(world_cases_growth['Date'], world_cases_growth['deaths'], color='r')
plt.xticks(world_cases_growth['Date'], rotation = 90)
plt.legend([p1[0], p2[0], p3[0]], ('Confirmed', 'Recovered', 'Deaths'))
plt.xlabel("Date")
plt.title("Stacked diagram of COVID-19 cases");
world_cases_growth['deaths'].values[-1]/world_cases_growth['Count'].values[-1]
con_cases_growth = confirmed_cases.groupby('Country/Region')[days_columns].agg(np.sum).reset_index()
con_cases_growth.head()
import requests
html_response = requests.get("https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data/csse_covid_19_daily_reports?_pjax=%23js-repo-pjax-container")
from bs4 import BeautifulSoup
soup = BeautifulSoup(html_response.content, "html.parser")
a_tags = list(map(lambda x : x.get('href'), soup.findAll(name = 'a', attrs={'class' : 'js-navigation-open link-gray-dark'})))
repo_path = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/"+os.path.basename(a_tags[-2])
repo_path
# daily_data = "csse_covid_19_data\\csse_covid_19_daily_reports"
# daily_data_dir = os.path.join(os.path.dirname(os.getcwd()), daily_data)
# daily_data_files = glob.glob(daily_data_dir+'/*.csv')[-1]
daily_data_files = repo_path
latest_data = pd.read_csv(daily_data_files)
# latest_data = latest_data.groupby(['Country_Region']).agg([])
latest_data.head()
latest_data['Province_State'] = latest_data['Province_State'].fillna(latest_data['Country_Region'])
latest_data['Active'] = latest_data['Confirmed'] - (latest_data[['Deaths', 'Recovered']].sum(axis = 1))
agg_latest_data = latest_data.groupby('Country_Region')['Confirmed', 'Deaths', 'Recovered', 'Active'].agg([np.sum]).reset_index()
agg_latest_data.columns = agg_latest_data.columns.droplevel(1)
agg_latest_data.head()
agg_latest_data = agg_latest_data.loc[(agg_latest_data[['Confirmed', 'Deaths', 'Recovered']] > 0).all(axis=1), :].reset_index(drop = True)
manual_dict = {'mainland china' : 'CHN', 'north macedonia' : 'MKD', 'palestine' : 'PSE',
'saint barthelemy' : 'FRA', 'south korea' : 'KOR', 'st. martin' : 'PYC',
'uk' : 'GBR', 'us' : 'USA', 'vatican city': 'ITA'}
def get_country_code(series):
df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_world_gdp_with_codes.csv')
df2 = pd.read_csv('countryCodes.csv', sep = '\t')
country_code_dict = {row[1][0].lower() : row[1][2] for row in df.iterrows()}
added_dict = {row[1][0].lower() : row[1][2] for row in df2.iterrows()}
# print(country_code_dict)
res = []
for key in series:
if key.lower() in country_code_dict:
res.append(country_code_dict.get(key.lower(), None))
elif key.lower() in added_dict:
res.append(added_dict.get(key.lower(), None))
else:
res.append(manual_dict.get(key.lower(), None))
return res
agg_latest_data['Code'] = get_country_code(agg_latest_data['Country_Region'])
from scipy.stats import rankdata
fig = px.scatter_geo(agg_latest_data, locations='Code', color=rankdata(agg_latest_data['Active'])/agg_latest_data.shape[0],
size = np.log(agg_latest_data['Active']+7),
hover_data = ['Country_Region', 'Active'],
projection="natural earth", color_continuous_scale = 'YlOrRd',
title="Count of Active COVID-19 cases in each country")
fig.update_geos(resolution=110,
showcountries=True, showcoastlines=False,)
# fig.show()
# fig.write_html("images/worldplot.html")
iplot(fig, filename='images/worldplot')
fig = px.choropleth(agg_latest_data, locations='Code', color=rankdata(agg_latest_data['Active'])/agg_latest_data.shape[0],
hover_data = ['Country_Region', 'Active'],
projection="natural earth", color_continuous_scale = 'YlOrRd',
title="Count of Active COVID-19 cases in each country")
fig.update_geos(resolution=110,
showcountries=True, showcoastlines=False,)
# fig.show()
# fig.write_html("images/worldplot.html")
iplot(fig, filename='images/worldplot')
On every day new people get affected, a part of affected people recover and some people die. Let us see what is the count of active cases in each day.
latest_data.head()
fig = px.treemap(latest_data, path = ['Country_Region', 'Province_State'], values = np.log(latest_data['Active']+7),
hover_data=['Active'], title = "Province/State wise distribution of Active COVID-19 cases in Each country")
fig.update_layout(width=2000,
height=1500)
iplot(fig, filename='images/treemap_active.html')
world_cases_growth['active_cases'] = world_cases_growth['Count'] - (world_cases_growth[['recovered', 'deaths']].sum(axis = 1))
fig = px.scatter(world_cases_growth, x = 'Date', y = 'active_cases', title='Daily count of active cases of COVID 19 through out the world')
# fig.show()
iplot(fig, filename='active_cases')
The number of Active cases have almost doubled with in the span of one week, 55K on 12-Mar and the number went past 120k on 18-Mar
confirmed_cases
selected_countries = ['China', 'Italy', 'France', 'Spain','Germany', 'Iran',
'Korea, South', 'US', 'United Kingdom', 'Switzerland', 'India']
selected_dates = [1]+list(range(4, confirmed_cases.shape[1]))
# selected_dates = [1]+list(range(confirmed_cases.shape[1], confirmed_cases.shape[1]))
confirmed_cases_view = confirmed_cases.iloc[:, selected_dates]
confirmed_cases_view = confirmed_cases_view.loc[confirmed_cases_view['Country/Region'].isin(selected_countries)].groupby('Country/Region').agg([sum]).reset_index(drop=False)
confirmed_cases_view.columns = confirmed_cases_view.columns.droplevel(1)
confirmed_cases_view
recovered_cases_view = recovered_cases.iloc[:, selected_dates]
recovered_cases_view = recovered_cases_view.loc[recovered_cases_view['Country/Region'].isin(selected_countries)].groupby('Country/Region').agg([sum]).reset_index(drop=False)
recovered_cases_view.columns = recovered_cases_view.columns.droplevel(1)
# recovered_cases_view
deaths_data_view = deaths_data.iloc[:, selected_dates]
deaths_data_view = deaths_data_view.loc[deaths_data_view['Country/Region'].isin(selected_countries)].groupby('Country/Region').agg([sum]).reset_index(drop=False)
deaths_data_view.columns = deaths_data_view.columns.droplevel(1)
# deaths_data_view
active_data_view = confirmed_cases_view.iloc[:, 1:] - (recovered_cases_view.iloc[:, 1:]+deaths_data_view.iloc[:, 1:])
# active_data_view
import plotly.graph_objects as go
fig = go.Figure()
for i in range(confirmed_cases_view.shape[0]):
fig.add_trace(go.Scatter(x = confirmed_cases_view.columns[1:], y = active_data_view.iloc[i, :],
name = confirmed_cases_view.iloc[i, 0]))
fig.update_layout(title = "Spread of COVID-19 in last 60 days in major Countries", height=800, width = 1000)
iplot(fig, filename='country_spread')
The graph shows how much a nation is prepared for an eidemic, clearly Italy was not expecting such massive outbreak. China on the otherhand showed the world that with proper care this Virus can be contained.
from ipywidgets import interact, widgets
from chart_studio.widgets import GraphWidget
@interact
def scatter_plot(country1 = countries, country2 = countries):
fig = go.Figure()
index1 = countries.index(country1)
index2 = countries.index(country2)
active_counts1 = active_data_view.iloc[index1, :]
active_counts2 = active_data_view.iloc[index2, :]
trace1 = fig.add_trace(go.Scatter(x = confirmed_cases_view.columns[1:], y = active_counts1, name=country1))
trace2 = fig.add_trace(go.Scatter(x = confirmed_cases_view.columns[1:], y = active_counts2, name=country2))
iplot(fig)